
##############################
### UK BES data: create TS ###
##############################

rm(list = ls())
gc()

library(data.table)
library(BBmisc)
library(tidyverse)
library(haven)
library(survey)
library(ineq) 
library(stargazer)
library(lfe) 


### Set WD
setwd("~/Dropbox (Princeton)/Research/*Spatial_Inequality/paper_SMD_inequality/JoP_final/replication")  

### Load datasets
load("data/bes.RData")
load("data/pcon_covars.RData")

### Merge
bes[ , year := as.character(year) ]
pcon_covars[ , year := as.character(year) ]

d_bes <- merge(bes, pcon_covars, by=c("pcon_code","year"), all.x=T)


### Index of Deprivation
load("data/imd.RData")

# SD at MSO level
imd_sd1 <- imd[ , lapply(.SD, function(x) sd(x, na.rm=T) ) , by=c("MSOA11CD","year"), .SDcols="IncomeScorerate" ]
imd_sd2 <- imd[ , lapply(.SD, function(x) sd(x, na.rm=T) ) , by=c("MSOA11CD","year"), .SDcols="IndexofMultipleDeprivationIMDScore" ]

setnames(imd_sd1, "IncomeScorerate", "sd_IncomeScorerate_MSOA11")
setnames(imd_sd2, "IndexofMultipleDeprivationIMDScore", "sd_IndexofMultipleDeprivationIMDScore_MSOA11")

imd_sd <- merge(imd_sd1,imd_sd2,by=c("MSOA11CD","year"))
setnames(imd_sd, "MSOA11CD", "msoa11_code")

### Recode year for deprivation data only available in 2015
imd_sd[ year == 2015, year := 2016 ]
imd_sd[ , year := as.character(year) ]

### Merge
d_bes <- merge(d_bes, imd_sd, by=c("msoa11_code","year"), all.x=T)


### -----------------
### Prepare data
### -----------------

d_bes[ redistSelf == 10 , gvt_should_reduce_inequ := 1 ]
d_bes[ redistSelf == 9 , gvt_should_reduce_inequ := 2 ]
d_bes[ redistSelf == 8 , gvt_should_reduce_inequ := 3 ]
d_bes[ redistSelf == 7 , gvt_should_reduce_inequ := 4 ]
d_bes[ redistSelf == 6 , gvt_should_reduce_inequ := 5 ]
d_bes[ redistSelf == 5 , gvt_should_reduce_inequ := 6 ]
d_bes[ redistSelf == 4 , gvt_should_reduce_inequ := 7 ]
d_bes[ redistSelf == 3 , gvt_should_reduce_inequ := 8 ]
d_bes[ redistSelf == 2 , gvt_should_reduce_inequ := 9 ]
d_bes[ redistSelf == 1 , gvt_should_reduce_inequ := 10 ]
d_bes[ redistSelf == 0 , gvt_should_reduce_inequ := 11 ]

d_bes[ , gvt_should_reduce_inequ_norm := BBmisc::normalize(gvt_should_reduce_inequ, method="range", range=c(0,1)) ]

d_bes[ , p_edlevel ]
d_bes[ p_edlevel == 0 , educ := "no qualifications"]
d_bes[ p_edlevel == 1 , educ := "belowGCSs"]
d_bes[ p_edlevel == 2 , educ := "GCSE"]
d_bes[ p_edlevel == 3 , educ := "A-level"]
d_bes[ p_edlevel == 4 , educ := "undergraduate"]
d_bes[ p_edlevel == 5 , educ := "postgrad"]

d_bes[ , educ := factor(educ, levels=c("no qualifications","belowGCSs","GCSE","A-level","undergraduate","postgrad"))]

d_bes[ , homeowner := ifelse(p_housing %in% c(1,2,3), 1, 0) ]

d_bes[ p_gross_household <= 15 , p_gross_hh_inc := p_gross_household ]

d_bes[ , male := ifelse(gender == 1, 1, 0)]

d_bes[ , married := ifelse(p_marital %in% c(1,2), 1, 0) ]

d_bes[ , employed := ifelse(workingStatus %in% c(1,2,3), 1, 0) ]
d_bes[ , unemployed := ifelse(workingStatus %in% c(4), 1, 0) ]
d_bes[ , student := ifelse(workingStatus %in% c(5,6), 1, 0) ]
d_bes[ , retired := ifelse(workingStatus %in% c(7), 1, 0) ]

d_bes[ generalElectionVote %in% c("1"), vote_choice_full := "Tory"]
d_bes[ generalElectionVote %in% c("2"), vote_choice_full := "Labour"]
d_bes[ is.na(vote_choice_full), vote_choice_full := "other"]
d_bes[ generalElectionVote %in% c("0","9999"), vote_choice_full := NA]

d_bes[ , vote_choice_full := as.factor(vote_choice_full) ]
d_bes[ , vote_choice_full := relevel(vote_choice_full, "other") ]

d_bes[ , vote_tory := ifelse(generalElectionVote == 1, 1, 0)  ]
d_bes[ , vote_labour := ifelse(generalElectionVote == 2, 1, 0)  ]
d_bes[ , vote_ukip := ifelse(generalElectionVote %in% c(6,11,12), 1, 0)  ]
d_bes[ , vote_LibDems := ifelse(generalElectionVote == 3, 1, 0)  ]

### Constituency ethnicity
d_bes[ , ethnicity_non_british := ifelse(p_ethnicity != 1, 1, 0)]
d_bes[ , .N, ethnicity_non_british ]

ethn_count <- d_bes[ ,.N, by=c("pcon_code","year","ethnicity_non_british")]
ethn_count[ , count_all := lapply(.SD, function(x) sum(x) ) , by=c("pcon_code","year") , .SDcols="N" ]

ethn_count <- ethn_count[ ethnicity_non_british == 1 ]

ethn_count[ , share_non_white_BES := N / count_all ]

d_bes <- merge(d_bes, ethn_count[ ,.(pcon_code, year, share_non_white_BES) ], by=c("pcon_code","year"), all.x=T)

d_bes[ p_hh_children <= 5 , hh_children := p_hh_children ]

### Normalizes 
d_bes[ , sd_IncomeScorerate_MSOA11_norm := BBmisc::normalize(sd_IncomeScorerate_MSOA11, method="range", range=c(0,1)) ]
d_bes[ , sd_IndexofMultipleDeprivationIMDScore_MSOA11_norm := BBmisc::normalize(sd_IndexofMultipleDeprivationIMDScore_MSOA11, method="range", range=c(0,1)) ]

d_bes[ , gini_inc := lapply(.SD, function(x) ineq::ineq(x, na.rm=T, type="Gini") ) , .SDcols="p_gross_hh_inc" ]
d_bes[ , gini_inc_pcon := lapply(.SD, function(x) ineq::ineq(x, na.rm=T, type="Gini") ) , by=c("pcon_code") , .SDcols="p_gross_hh_inc" ]
d_bes[ , diff_mean_med_tot_inc_log := log(diff_mean_med_tot_inc) ]

d_bes[ , count_resp_pcon_yr := .N, ,.(pcon_code,year) ] 



### -----------------
### Plots 
### -----------------

### Distribution of inequality
d_bes_gini <- na.omit(d_bes[ gini_inc_pcon > 0 ,.(pcon,year,gini_inc,gini_inc_pcon) ])

### SI B.2
pdf("out/si_fig_b2.pdf", width=6, height=4) # BES_gini_distribution_v1.pdf
(plot <- (ggplot(data=d_bes_gini)
          + geom_density(aes(x=gini_inc, fill="across nation"), alpha=0.75)
          + geom_density(data=unique(d_bes_gini[ ,.(pcon,gini_inc_pcon) ]), 
                         aes(x=gini_inc_pcon, fill="across constituencies"), alpha=0.75)
          + geom_vline(data=d_bes_gini[ ,.(pcon,gini_inc) ], 
                       aes(xintercept = median(gini_inc, na.rm=T)), color="white", linetype="solid", linewidth=1.75)
          + geom_vline(data=d_bes_gini[ ,.(pcon,gini_inc) ], 
                       aes(xintercept = median(gini_inc, na.rm=T), colour = "across nation"), linetype="solid", linewidth=1.25)
          + geom_vline(data=unique(d_bes_gini[ ,.(pcon,gini_inc_pcon) ]), 
                       aes(xintercept = median(gini_inc_pcon, na.rm=T)), color="white", linetype="solid", linewidth=1.75)
          + geom_vline(data=unique(d_bes_gini[ ,.(pcon,gini_inc_pcon) ]), 
                       aes(xintercept = median(gini_inc_pcon, na.rm=T), colour = "across constituencies"), linetype="solid", linewidth=1.25)
          + theme_bw()
          + theme(legend.position="right", panel.border = element_rect(color="gray"), panel.grid = element_blank(),)
          + scale_color_brewer("Median", palette="Set1")
          + scale_fill_brewer("Distribution",palette="Set1")
          + ylab("Density")
          + xlab("Gini index") 
))
dev.off()



### SI Figure B.3
d_vote_gini2 <- d_bes[ ,.(pcon,year,vote_labour,vote_tory)]
d_vote_gini2[ , sd_vote_labour_pcon := lapply(.SD, function(x) sd(x, na.rm=T) ) , by=c("pcon") , .SDcols="vote_labour" ]
d_vote_gini2[ , sd_vote_tory_pcon := lapply(.SD, function(x) sd(x, na.rm=T) ) , by=c("pcon") , .SDcols="vote_tory" ]

d_vote_gini2[ !is.na(vote_labour) , gini_vote_labour_pcon := lapply(.SD, function(x) ineq::ineq(x, na.rm=T, type="Gini") ), by=c("pcon") , .SDcols="vote_labour" ]
d_vote_gini2[ !is.na(vote_tory) , gini_vote_tory_pcon := lapply(.SD, function(x) ineq::ineq(x, na.rm=T, type="Gini") ), by=c("pcon") , .SDcols="vote_tory" ]


pdf("out/si_fig_b3.pdf", width=6, height=4) # BES_gini_vote_concentration_v1.pdf
(plot <- (ggplot(data=d_vote_gini2[ gini_vote_labour_pcon > 0 & gini_vote_tory_pcon > 0 ])
          + geom_density(aes(x=gini_vote_labour_pcon, fill="Labour voters"), alpha=0.75)
          + geom_density(aes(x=gini_vote_tory_pcon, fill="Tory voters"), alpha=0.75)
          + geom_vline(data=d_vote_gini2[ ,.(pcon,gini_vote_labour_pcon) ], 
                       aes(xintercept = mean(gini_vote_labour_pcon, na.rm=T)), colour="white", linetype="solid", linewidth=1.75)
          + geom_vline(data=d_vote_gini2[ ,.(pcon,gini_vote_labour_pcon) ], 
                       aes(xintercept = mean(gini_vote_labour_pcon, na.rm=T), colour = "Labour voters"), linetype="solid", linewidth=1.25)
          + geom_vline(data=d_vote_gini2[ ,.(pcon,gini_vote_tory_pcon) ], 
                       aes(xintercept = mean(gini_vote_tory_pcon, na.rm=T)), colour="white", linetype="solid", linewidth=1.75)
          + geom_vline(data=d_vote_gini2[ ,.(pcon,gini_vote_tory_pcon) ], 
                       aes(xintercept = mean(gini_vote_tory_pcon, na.rm=T), colour = "Tory voters"), linetype="solid", linewidth=1.25)
          + theme_bw()
          + ylab("Density")
          + xlab("Concentration of votes (gini)")
          + theme(legend.position="right", panel.border = element_rect(color="gray"), panel.grid = element_blank(),)
          + scale_color_brewer("Mean", palette="Set1")
          + scale_fill_brewer("Distribution", palette="Set1")
))
dev.off()


### Figure 5a
d_bes[ classification_v1 %in% c("Village or smaller") , classification_v2 := "Village"]
d_bes[ classification_v1 %in% c("Small Town","Medium Town","Large Town") , classification_v2 := "Town"]
d_bes[ classification_v1 %in% c("Core City (outside London)","Other City") , classification_v2 := "City (outside London)"]
d_bes[ classification_v1 %in% c("Core City (London)") , classification_v2 := "London"]
d_bes[ , classification_v2 := factor(classification_v2, levels=c("Village","Town","City (outside London)","London")) ]

pdf("out/si_fig_5a.pdf", width=4.25, height=3.75) # BES_pref_urban_class_v1x.pdf
(plot <- (ggplot(data=d_bes[ !is.na(classification_v2) ], 
                 aes(y=gvt_should_reduce_inequ_norm, x=year, group=classification_v2, colour=classification_v2, shape=classification_v2, fill=classification_v2))
          + stat_summary(geom="line", linewidth=0.6)
          + stat_summary(geom="point", size=2, colour="gray20")
          + theme_bw()
          + ylab("Average support for redistribution")
          + xlab("Year")
          + theme(legend.position = c(0.32, 0.1), panel.border = element_rect(color="gray"), 
                  panel.grid = element_blank(),
                  legend.title = element_blank(), legend.spacing.x = unit(2, 'mm'),
                  legend.key.size = unit(2, "mm"), legend.text=element_text(size=8))
          + guides(colour = guide_legend(ncol=2, byrow=T))
          + scale_color_brewer("", palette="Reds")
          + scale_fill_brewer("", palette="Reds")
          + scale_shape_manual("", values=c(21:25))
          + coord_cartesian(ylim=c(0.49, 0.66)) 
))
dev.off()



### Figure 5b
bs_plot2 <- binsreg::binsreg(d_bes[ !is.na(pop_density) ]$gvt_should_reduce_inequ_norm, d_bes[ !is.na(pop_density) ]$pop_density)

bs_out2 <- as.data.table(bs_plot2$data.plot$`Group Full Sample`$data.dots)

setnames(bs_out2, old=c("x","fit"), new=c("pop_density","gvt_should_reduce_inequ_norm"))

pdf("out/si_fig_5b.pdf",  width=4.25, height=3.75) # BES_pref_density_v1x2.pdf
(plot <- (ggplot(data=d_bes[ !is.na(pop_density) ], aes(y=gvt_should_reduce_inequ_norm, x=pop_density))
          + geom_point(data=bs_out2, alpha=0.5)
          + stat_smooth(method="lm")
          + theme_bw()
          + xlab("Population density")
          + ylab("Average support for redistribution")
          + theme(legend.position="bottom", panel.grid = element_blank(), panel.border = element_rect(color="gray"))
))     
dev.off()




### -----------------
### Summary statistics 
### -----------------

d_bes_statistics <- na.omit(d_bes[ ,.(id, pcon_code, year, gvt_should_reduce_inequ_norm, male, age, educ, 
                                      p_gross_hh_inc, student, unemployed, retired, 
                                      married, hh_children, homeowner, vote_tory, vote_labour, 
                                      diff_mean_med_tot_inc_log, median_property_price, 
                                      share_work_in_services, total_mean_income)])

d_bes_statistics[ , median_property_price_log := log(median_property_price) ]
d_bes_statistics[ , total_mean_income_log := log(total_mean_income) ]

sum_stat_bes <- as.data.table(rbind(cbind("var" = "Support for redistribution", "mean" = mean(d_bes_statistics$gvt_should_reduce_inequ_norm), "sd" = sd(d_bes_statistics$gvt_should_reduce_inequ_norm), 
                                          "min" = min(d_bes_statistics$gvt_should_reduce_inequ_norm), "max" = max(d_bes_statistics$gvt_should_reduce_inequ_norm)),
                                    cbind("var" = "Gross household income", "mean" = mean(d_bes_statistics$p_gross_hh_inc), "sd" = sd(d_bes_statistics$p_gross_hh_inc), 
                                          "min" = min(d_bes_statistics$p_gross_hh_inc), "max" = max(d_bes_statistics$p_gross_hh_inc)),
                                    cbind("var" = "Number of children in household", "mean" = mean(d_bes_statistics$hh_children), "sd" = sd(d_bes_statistics$hh_children), 
                                          "min" = min(d_bes_statistics$hh_children), "max" = max(d_bes_statistics$hh_children)),
                                    cbind("var" = "Age", "mean" = mean(d_bes_statistics$age), "sd" = sd(d_bes_statistics$age), 
                                          "min" = min(d_bes_statistics$age), "max" = max(d_bes_statistics$age))))

sum_stat_bes[ , 2:5 := lapply(.SD, function(x) round(as.numeric(as.character(x)),2) ), .SDcols= 2:5 ]

### SI Table B.5
print(xtable::xtable(sum_stat_bes, include.colnames=T, caption="Summary statistics"), include.rownames=F)


### Shares
svy_d_bes_statistics <- svydesign(id=~id, weights=~1, data=d_bes_statistics)

tab.d1 <- data.table(prop.table(svytable( ~ male, svy_d_bes_statistics)))
tab.d2 <- data.table(prop.table(svytable( ~ educ, svy_d_bes_statistics)))
tab.d3 <- data.table(prop.table(svytable( ~ student, svy_d_bes_statistics)))
tab.d4 <- data.table(prop.table(svytable( ~ unemployed, svy_d_bes_statistics)))
tab.d5 <- data.table(prop.table(svytable( ~ retired, svy_d_bes_statistics)))
tab.d6 <- data.table(prop.table(svytable( ~ married, svy_d_bes_statistics)))
tab.d7 <- data.table(prop.table(svytable( ~ homeowner, svy_d_bes_statistics)))
tab.d8 <- data.table(prop.table(svytable( ~ vote_labour, svy_d_bes_statistics)))
tab.d9 <- data.table(prop.table(svytable( ~ vote_tory, svy_d_bes_statistics)))

names(tab.d1) <- c("Type","Share")
names(tab.d2) <- c("Type","Share")
names(tab.d3) <- c("Type","Share")
names(tab.d4) <- c("Type","Share")
names(tab.d5) <- c("Type","Share")
names(tab.d6) <- c("Type","Share")
names(tab.d7) <- c("Type","Share")
names(tab.d8) <- c("Type","Share")
names(tab.d9) <- c("Type","Share")

tab_bes <- as.data.table(rbind(tab.d1[ Type == 0 ], tab.d2, tab.d3[ Type == 1 ], tab.d4[ Type == 1 ], tab.d5[ Type == 1 ], 
                               tab.d6[ Type == 1 ], tab.d7[ Type == 1 ], tab.d8[ Type == 1 ], tab.d9[ Type == 1 ]))

list <- c("Female","No qualifications", "Less than GCSs","GCSE","A-level","Undergraduate","Postgrad","Student","Unemployed",
          "Retired","Married","Homeowner","Vote for Labour", "Vote for Tories")

tab_bes[ , variable := list ]

### SI Table B.4
print(xtable::xtable(tab_bes[ ,.(variable,Share) ], include.colnames=T, caption="Summary statistics"), include.rownames=F)



### Differences across constituencies
d_bes[ , const_ineq_type := ifelse(diff_mean_med_tot_inc > median(diff_mean_med_tot_inc, na.rm=T),"above_med_ineq","below_med_ineq") ]

# Shares
sum_stat1 <- svydesign(id=~id, weights=~1, data=na.omit(d_bes[ ! is.na(const_ineq_type) & vote_labour == 1 & const_ineq_type == "above_med_ineq" ,
                                                               .(id, year, gvt_should_reduce_inequ_norm, male, age, educ, p_gross_hh_inc, student, unemployed, retired, 
                                                                 married, hh_children, homeowner)]))

tab1.d1 <- data.table(prop.table(svytable( ~ male, sum_stat1)))
tab1.d2 <- data.table(prop.table(svytable( ~ educ, sum_stat1)))
tab1.d3 <- data.table(prop.table(svytable( ~ student, sum_stat1)))
tab1.d4 <- data.table(prop.table(svytable( ~ unemployed, sum_stat1)))
tab1.d5 <- data.table(prop.table(svytable( ~ retired, sum_stat1)))
tab1.d6 <- data.table(prop.table(svytable( ~ married, sum_stat1)))
tab1.d7 <- data.table(prop.table(svytable( ~ homeowner, sum_stat1)))

names(tab1.d1) <- c("Type","Share")
names(tab1.d2) <- c("Type","Share")
names(tab1.d3) <- c("Type","Share")
names(tab1.d4) <- c("Type","Share")
names(tab1.d5) <- c("Type","Share")
names(tab1.d6) <- c("Type","Share")
names(tab1.d7) <- c("Type","Share")

tab1.out <- as.data.table(rbind(tab1.d1[ Type == 0 ], tab1.d2, tab1.d3[ Type == 1 ], tab1.d4[ Type == 1 ], tab1.d5[ Type == 1 ], 
                                tab1.d6[ Type == 1 ], tab1.d7[ Type == 1 ]))

list1 <- c("Female","No qualifications", "Less than GCSs","GCSE","A-level","Undergraduate","Postgrad","Student","Unemployed",
           "Retired","Married","Homeowner")

tab1.out[ , variable := list1 ]

sum_stat2 <- svydesign(id=~id, weights=~1, data=na.omit(d_bes[ ! is.na(const_ineq_type) & vote_labour == 1 & const_ineq_type == "below_med_ineq" ,
                                                               .(id, year, gvt_should_reduce_inequ_norm, male, age, educ, p_gross_hh_inc, student, 
                                                                 unemployed, retired, married, hh_children, homeowner)]))

tab2.d1 <- data.table(prop.table(svytable( ~ male, sum_stat2)))
tab2.d2 <- data.table(prop.table(svytable( ~ educ, sum_stat2)))
tab2.d3 <- data.table(prop.table(svytable( ~ student, sum_stat2)))
tab2.d4 <- data.table(prop.table(svytable( ~ unemployed, sum_stat2)))
tab2.d5 <- data.table(prop.table(svytable( ~ retired, sum_stat2)))
tab2.d6 <- data.table(prop.table(svytable( ~ married, sum_stat2)))
tab2.d7 <- data.table(prop.table(svytable( ~ homeowner, sum_stat2)))

names(tab2.d1) <- c("Type","Share")
names(tab2.d2) <- c("Type","Share")
names(tab2.d3) <- c("Type","Share")
names(tab2.d4) <- c("Type","Share")
names(tab2.d5) <- c("Type","Share")
names(tab2.d6) <- c("Type","Share")
names(tab2.d7) <- c("Type","Share")

tab2.out <- as.data.table(rbind(tab2.d1[ Type == 0 ], tab2.d2, tab2.d3[ Type == 1 ], tab2.d4[ Type == 1 ], tab2.d5[ Type == 1 ], 
                                tab2.d6[ Type == 1 ], tab2.d7[ Type == 1 ]))

list2 <- c("Female","No qualifications", "Less than GCSs","GCSE","A-level","Undergraduate","Postgrad","Student","Unemployed",
           "Retired","Married","Homeowner")

tab2.out[ , variable := list2 ]

tab1_comb <- cbind(tab2.out[,.(variable,Share)],tab1.out[,.(Share)])
names(tab1_comb) <- c("variable","below median ineq", "above median inequ")


### Averages 
d1 <- na.omit(d_bes[ ! is.na(const_ineq_type) & vote_labour == 1 & const_ineq_type == "above_med_ineq" ,.(p_gross_hh_inc,age,hh_children) ])

tab1.1.out <- t(cbind("Gross household income (category)"=mean(d1$p_gross_hh_inc),"Age"=mean(d1$age), "Number of children in household"=mean(d1$hh_children)))

vars1 <- rownames(tab1.1.out)

tab1.1.out <- as.data.table(tab1.1.out)
tab1.1.out[ , vars := vars1 ]

d2 <- na.omit(d_bes[ ! is.na(const_ineq_type) & vote_labour == 1 & const_ineq_type == "below_med_ineq" ,.(p_gross_hh_inc,age,hh_children) ])

tab2.1.out <- t(cbind("Gross household income (category)"=mean(d2$p_gross_hh_inc),"Age"=mean(d2$age), "Number of children in household"=mean(d2$hh_children)))

vars2 <- rownames(tab2.1.out)

tab2.1.out <- as.data.table(tab2.1.out)
tab2.1.out[ , vars := vars2 ]

tab2_comb <- cbind(tab2.1.out[,.(vars,V1)],tab1.1.out[,.(V1)])
names(tab2_comb) <- c("variable","below median ineq", "above median inequ")

### Combine
tab12_comb <- rbind(tab1_comb,tab2_comb)

### SI Table B.7  
print(xtable::xtable(tab12_comb, include.colnames=T, caption="Summary statistics for Labour Voters"), include.rownames=F)



### -----------------
### Models 
### -----------------

### Support for redistribution
mod1.1 <- felm(gvt_should_reduce_inequ_norm ~ diff_mean_med_tot_inc_log + male + age + educ + p_gross_hh_inc + student + unemployed + retired + 
                 married + hh_children + homeowner | pcon_code + year | 0 | pcon_code, d_bes[ count_resp_pcon_yr >= 50 ])

mod1.2 <- felm(gvt_should_reduce_inequ_norm ~ diff_mean_med_tot_inc_log + male + age + educ + p_gross_hh_inc + student + unemployed + retired + 
                 married + hh_children + homeowner +  log(median_property_price) + share_with_degree + share_work_in_services + log(total_mean_income) + 
                 vote_share_lab_PCON_FULL + share_non_white_BES
               | pcon_code + year | 0 | pcon_code, d_bes[ count_resp_pcon_yr >= 50 ])

mod1.3 <- felm(gvt_should_reduce_inequ_norm ~ diff_mean_med_tot_inc_log + male + age + educ + p_gross_hh_inc + student + unemployed + retired + 
                 married + hh_children + homeowner + log(median_property_price) + share_with_degree + share_work_in_services + log(total_mean_income) + 
                 vote_share_lab_PCON_FULL + share_non_white_BES + log(diff_mean_med_tot_inc_UK) + as.numeric(year)
               | pcon_code | 0 | pcon_code, d_bes[ count_resp_pcon_yr >= 50 ])

### Support for redistribution by party
mod2.1 <- felm(gvt_should_reduce_inequ_norm ~ diff_mean_med_tot_inc_log*vote_choice_full + male + age + educ + p_gross_hh_inc + student + unemployed + retired + 
                 married + hh_children + homeowner + log(median_property_price) + share_with_degree + share_work_in_services + log(total_mean_income) 
               | pcon_code + year | 0 | pcon_code, d_bes[ count_resp_pcon_yr >= 50 ])

mod2.2 <- felm(gvt_should_reduce_inequ_norm ~ diff_mean_med_tot_inc_log*vote_choice_full + male + age + educ + p_gross_hh_inc + student + unemployed + retired + 
                 married + hh_children + homeowner + log(median_property_price) + share_with_degree + share_work_in_services + log(total_mean_income) + 
                 vote_share_lab_PCON_FULL + share_non_white_BES
               | pcon_code + year | 0 | pcon_code, d_bes[ count_resp_pcon_yr >= 50 ])

mod2.3 <- felm(gvt_should_reduce_inequ_norm ~ diff_mean_med_tot_inc_log*vote_choice_full + male + age + educ + p_gross_hh_inc + student + unemployed + retired + 
                 married + hh_children + homeowner + log(median_property_price) + share_with_degree + share_work_in_services + log(total_mean_income) + 
                 vote_share_lab_PCON_FULL + share_non_white_BES + log(diff_mean_med_tot_inc_UK) + as.numeric(year)
               | pcon_code | 0 | pcon_code, d_bes[ count_resp_pcon_yr >= 50 ])


### Table 2
stargazer(mod1.1, mod1.2, mod1.3, mod2.2, 
          digits.extra=0, digits=3, column.sep.width = "-20pt",
          align=T, no.space=T, omit.stat=c("ser","F"), type="text", 
          title = "Effect of Constituency-Level Inequality on Support for Redistribution", table.placement = "h!",
          keep = c("diff_mean_med_tot_inc_log","vote_choice_full"),
          covariate.labels = c("Constituency inequality (log)","Vote for Labour","Vote for Tories",
                               "Constituency inequality (log) $\\times$ Vote for Labour",
                               "Constituency inequality (log) $\\times$ Vote for Tories"),
          dep.var.labels = c("Support for redistribution"), 
          add.lines = list(c("Mean DV",
                             round(mean(mod1.1$fitted.values),3),
                             round(mean(mod1.2$fitted.values),3),
                             round(mean(mod1.3$fitted.values),3),
                             round(mean(mod2.2$fitted.values),3)),
                           c("Constituency FE","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$"),
                           c("Year FE","$\\checkmark$","$\\checkmark$","-","$\\checkmark$"),
                           c("Year trend","-","-","$\\checkmark$","-"),
                           c("Individual covariates","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$"),
                           c("Constituency covariates","-","$\\checkmark$","$\\checkmark$","$\\checkmark$"),
                           c("UK-wide inequality","-","-","$\\checkmark$","-")))


### SI Table B.8
lab_cov2 <- c("Constituency inequality (log)","Vote for Labour","Vote for Tories",
              "Male","Age","Education: below GCSs","Education: GCSE", "Education: A-Level",
              "Education: undergraduate", "Education: postgraduate","Household gross income",
              "Student","Unemployed","Retired","Married","Children in household","Homeowner",
              "Median house price (log)","Share people with degree","Share people work in service sector","Mean income (log)",
              "Constituency Labour vote share", "Share non-white","UK inequality (log)",
              "Constituency inequality (log) $\\times$ Vote for Labour",
              "Constituency inequality (log) $\\times$ Vote for Tories")

stargazer(mod1.1, mod1.2, mod1.3, mod2.1, mod2.2, mod2.3,
          digits.extra=0, digits=3, column.sep.width = "-15pt",
          align=T, no.space=T, omit.stat=c("ser","F"), type="text", omit = c("as.numeric\\(year\\)"),
          covariate.labels = lab_cov2,
          title = "Effect of Constituency-Level Inequality on Support for Redistribution", table.placement = "h!",
          dep.var.labels = c("Support for redistribution"),
          add.lines = list(c("Mean DV",
                             round(mean(mod1.1$fitted.values),3),
                             round(mean(mod1.2$fitted.values),3),
                             round(mean(mod1.3$fitted.values),3),
                             round(mean(mod2.1$fitted.values),3),
                             round(mean(mod2.2$fitted.values),3),
                             round(mean(mod2.3$fitted.values),3)),
                           c("Constituency FE","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$"),
                           c("Year FE","$\\checkmark$","$\\checkmark$","-","$\\checkmark$","$\\checkmark$","-"),
                           c("Year trend","-","-","$\\checkmark$","-","-","$\\checkmark$")))



### -----------------
### Marginal effects
### -----------------

var.level.mod2.2 <- seq(min(d_bes[ , diff_mean_med_tot_inc_log ], na.rm=T),
                        max(d_bes[ , diff_mean_med_tot_inc_log ], na.rm=T), length.out = 25)

# Coefficients
estmean.mod2.2 <- mod2.2$coefficients
var.mod2.2 <- mod2.2$clustervcv

# Vote Labour 
rownames(mod2.2$coefficients)

marg.eff_mod2.2_L <- mod2.2$coefficients[2] + mod2.2$coefficients[24]*var.level.mod2.2

SEs_mod2.2_L <- rep(NA, length(var.level.mod2.2))
for (i in 1:length(var.level.mod2.2)){
  j <- var.level.mod2.2[i]
  SEs_mod2.2_L[i] <- msm::deltamethod (~  (x2) + (x24)*j , estmean.mod2.2, var.mod2.2, ses=T)
}

tab_mod2.2_L <- as.data.table(cbind(var.level=var.level.mod2.2, coef = marg.eff_mod2.2_L, se = SEs_mod2.2_L))
tab_mod2.2_L[ , type := "Vote for Labour" ]


# Vote Tories
rownames(mod2.2$coefficients)

marg.eff_mod2.2_T <- mod2.2$coefficients[3] + mod2.2$coefficients[25]*var.level.mod2.2

SEs_mod2.2_T <- rep(NA, length(var.level.mod2.2))
for (i in 1:length(var.level.mod2.2)){
  j <- var.level.mod2.2[i]
  SEs_mod2.2_T[i] <- msm::deltamethod (~  (x3) + (x25)*j , estmean.mod2.2, var.mod2.2, ses=T)
}

tab_mod2.2_T <- as.data.table(cbind(var.level=var.level.mod2.2, coef = marg.eff_mod2.2_T, se = SEs_mod2.2_T))
tab_mod2.2_T[ , type := "Vote for Tories" ]

# Combine
tab_mod2.2_comb <- rbind(tab_mod2.2_L, tab_mod2.2_T)

tab_mod2.2_comb[ , upper := coef + 1.96*se]
tab_mod2.2_comb[ , lower := coef - 1.96*se]


# Density plot
hist.out <- hist(d_bes[ , diff_mean_med_tot_inc_log ], breaks=80, plot=F)
n.hist <- length(hist.out$mids)
dist <- hist.out$mids[2]-hist.out$mids[1]
hist.max <- max(hist.out$counts)
yrange <- c(tab_mod2.2_comb[ , upper], tab_mod2.2_comb[ ,lower])
maxdiff<-(max(yrange)-min(yrange))

# Create data frame
hist_mod2.2 <- data.frame(ymin=rep(min(yrange)-maxdiff/5,n.hist),
                          ymax=hist.out$counts/hist.max*maxdiff/5+min(yrange)-maxdiff/5,
                          xmin=hist.out$mids-dist/2,
                          xmax=hist.out$mids+dist/2)

hist_mod2.2 <- as.data.table(hist_mod2.2)

tab_mod2.2_comb[ type == "Vote for Labour", type2 := "Labour"]
tab_mod2.2_comb[ type == "Vote for Tories", type2 := "Tory"]

pdf("out/fig6.pdf", width=5.5, height=3) # BES_marg_eff_ineq_red_vote_choice_v2.pdf
(plot <- (ggplot()
          + geom_rect(data=hist_mod2.2, aes(xmin=xmin,xmax=xmax,ymin=ymin,ymax=ymax), fill="gray90", alpha=0.5, colour="gray70")
          + geom_hline(yintercept=0, colour = "gray50", linetype="dashed", linewidth=.5)
          + geom_ribbon(data=tab_mod2.2_comb, aes(y=coef, x=var.level, ymin=lower, ymax=upper, fill=type2), colour=NA, alpha=0.3, show.legend = F)
          + geom_line(data=tab_mod2.2_comb, aes(y=coef, x=var.level, colour=type2), linewidth=0.9, show.legend = F)
          + ylab("Marginal effect of vote choice\non support for redistribution")
          + xlab("Constituency mean-median inequality, log £")
          + theme_bw()
          + facet_wrap(~ type2)
          + scale_colour_manual("Vote choice", values=c("#DC241f","#0087DC"))
          + scale_fill_manual("Vote choice", values=c("#DC241f","#0087DC"))
          + theme(legend.position="right", panel.grid=element_blank(), panel.border = element_rect(color="gray"))
          
))
dev.off()



### -----------------
### Robustness: full sample
### -----------------

mod1x.1 <- felm(gvt_should_reduce_inequ_norm ~ diff_mean_med_tot_inc_log + male + age + educ + p_gross_hh_inc + student + unemployed + retired + 
                  married + hh_children + homeowner | pcon_code + year | 0 | pcon_code, d_bes)

mod1x.2 <- felm(gvt_should_reduce_inequ_norm ~ diff_mean_med_tot_inc_log + male + age + educ + p_gross_hh_inc + student + unemployed + retired + 
                  married + hh_children + homeowner +  log(median_property_price) + share_with_degree + share_work_in_services + log(total_mean_income) + 
                  vote_share_lab_PCON_FULL + share_non_white_BES
                | pcon_code + year | 0 | pcon_code, d_bes)

mod1x.3 <- felm(gvt_should_reduce_inequ_norm ~ diff_mean_med_tot_inc_log + male + age + educ + p_gross_hh_inc + student + unemployed + retired + 
                  married + hh_children + homeowner + log(median_property_price) + share_with_degree + share_work_in_services + log(total_mean_income) + 
                  vote_share_lab_PCON_FULL + share_non_white_BES + log(diff_mean_med_tot_inc_UK) + as.numeric(year)
                | pcon_code | 0 | pcon_code, d_bes)

### Interaction models 
mod2x.1 <- felm(gvt_should_reduce_inequ_norm ~ diff_mean_med_tot_inc_log*vote_choice_full + male + age + educ + p_gross_hh_inc + student + unemployed + retired + 
                  married + hh_children + homeowner + log(median_property_price) + share_with_degree + share_work_in_services + log(total_mean_income) 
                | pcon_code + year | 0 | pcon_code, d_bes)

mod2x.2 <- felm(gvt_should_reduce_inequ_norm ~ diff_mean_med_tot_inc_log*vote_choice_full + male + age + educ + p_gross_hh_inc + student + unemployed + retired + 
                  married + hh_children + homeowner + log(median_property_price) + share_with_degree + share_work_in_services + log(total_mean_income) + 
                  vote_share_lab_PCON_FULL + share_non_white_BES
                | pcon_code + year | 0 | pcon_code, d_bes)

mod2x.3 <- felm(gvt_should_reduce_inequ_norm ~ diff_mean_med_tot_inc_log*vote_choice_full + male + age + educ + p_gross_hh_inc + student + unemployed + retired + 
                  married + hh_children + homeowner + log(median_property_price) + share_with_degree + share_work_in_services + log(total_mean_income) + 
                  vote_share_lab_PCON_FULL + share_non_white_BES + log(diff_mean_med_tot_inc_UK) + as.numeric(year)
                | pcon_code | 0 | pcon_code, d_bes)

### SI Table B.9
stargazer(mod1x.1, mod1x.2, mod1x.3, mod2x.1, mod2x.2, mod2x.3,
          digits.extra=0, digits=3, column.sep.width = "-15pt",
          align=T, no.space=T, omit.stat=c("ser","F"), type="text", omit = c("as.numeric\\(year\\)"),
          covariate.labels = lab_cov2,
          title = "Effect of Constituency-Level Inequality on Support for Redistribution, Full Sample", table.placement = "h!",
          dep.var.labels = c("Support for redistribution"),
          add.lines = list(c("Mean DV",
                             round(mean(mod1x.1$fitted.values),3),
                             round(mean(mod1x.2$fitted.values),3),
                             round(mean(mod1x.3$fitted.values),3),
                             round(mean(mod2x.1$fitted.values),3),
                             round(mean(mod2x.2$fitted.values),3),
                             round(mean(mod2x.3$fitted.values),3)),
                           c("Constituency FE","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$"),
                           c("Year FE","$\\checkmark$","$\\checkmark$","-","$\\checkmark$","$\\checkmark$","-"),
                           c("Year trend","-","-","$\\checkmark$","-","-","$\\checkmark$")))




### -----------------
### Alternative local inequality measures
### -----------------

mod3.1 <- felm(gvt_should_reduce_inequ_norm ~ sd_IncomeScorerate_MSOA11_norm + male + age + educ + p_gross_hh_inc + student + unemployed + retired +
                 married + hh_children + homeowner +  log(median_property_price) + share_with_degree + share_work_in_services + log(total_mean_income) + 
                 vote_share_lab_PCON_FULL + share_non_white_BES
               | pcon_code + year | 0 | pcon_code, d_bes[ count_resp_pcon_yr >= 50 ])

mod3.2 <- felm(gvt_should_reduce_inequ_norm ~ sd_IndexofMultipleDeprivationIMDScore_MSOA11_norm + male + age + educ + p_gross_hh_inc + student + unemployed + retired +
                 married + hh_children + homeowner + log(median_property_price) + share_with_degree + share_work_in_services + log(total_mean_income) + 
                 vote_share_lab_PCON_FULL + share_non_white_BES
               | pcon_code + year | 0 | pcon_code, d_bes[ count_resp_pcon_yr >= 50 ])

lab_cov3x <- c("SD Income scores","SD Multiple deprivation",
               "Male","Age","Education: below GCSs","Education: GCSE", "Education: A-Level", 
               "Education: undergraduate", "Education: postgraduate","Household gross income",
               "Student","Unemployed","Retired","Married","Children in household","Homeowner",
               "Median house price (log)","Share people with degree","Share people work in service sector","Mean income (log)",
               "Constituency Labour vote share", "Share non-white")

### SI Table B.10
stargazer(mod3.1, mod3.2,
          digits.extra=0, digits=3, column.sep.width = "-15pt",
          align=T, no.space=T, omit.stat=c("ser","F"), type="text",  
          covariate.labels = lab_cov3x,
          title = "Effect of Constituency-Level Inequality on Support for Redistribution, Smaller Local Level", table.placement = "h!",
          dep.var.labels = c("Support for redistribution"),
          add.lines = list(c("Mean DV",
                             round(mean(mod3.1$fitted.values),3),
                             round(mean(mod3.2$fitted.values),3)),
                           c("Constituency FE","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$"),
                           c("Year FE","$\\checkmark$","$\\checkmark$","$\\checkmark$","$\\checkmark$")))

